Storm Track Analysis

The tracking algorithm is a fork of the Tint (Tint is not Titan) tracking algorithm (http://openradarscience.org/TINT/). The framework has been modified that it can be also applied to model output data that is not not stored in Py-ART radar data container.

  • The analysis is based averages of one week worth of strome cell tracking:

1. Median of strom area, duration, avg., max. rain-rates and # of storm cells

In [725]:
#Plot the Medians
um044_n = len(UM044_ens.coords['dim_0'])
um133_n = len(UM133_ens.coords['dim_0'])
cpol_n = len(OBS.dataset['total'].coords['dim_0'])
cpol_n2 = len(CPOL.coords['dim_0'])
CPOL_pdf2 = ravel(OBS.dataset['obs'])
var=['avg_area','dur', 'avg_mean', 'max_mean']
names=['area', 'duration', 'avg-rain', 'max-rain', 'distance', '# storms']
#print(CPOL_pdf[var].median())
medians = pd.DataFrame({'a': list(CPOL_pdf[var].median().values)+[2.5*16.02,int(cpol_n)],
                        'b': list(CPOL_pdf2[var].median().values)+[2.5*16.42,int(cpol_n2)],
                        'd': list(UM044_pdf[var].median().values)+[2*10.01,int(um044_n)],
                        'c': list(UM133_pdf[var].median().values)+[2*10.62,int(um133_n)]} )
                       #index=('Area','Duration', 'Mean-Rain', 'Max-Rain'))
medians.index=names
medians.columns=['Bootstrap', 'Cpol', 'UM 1.33km', 'UM 0.44km']
#print('Medians:')
                 
medians.round(2)
Out[725]:
Bootstrap Cpol UM 1.33km UM 0.44km
area 108.33 119.38 75.52 57.92
duration 60.00 74.67 60.00 50.00
avg-rain 4.52 4.50 4.78 5.55
max-rain 6.58 6.60 6.88 8.26
distance 40.05 41.05 21.24 20.02
# storms 1184.00 42.00 50.00 73.00

2. 2D-Histograms (storm area vs duration):

In [712]:
#Create Hex-Bin plot
mpld3.disable_notebook()
var=['avg_area','dur', 'max_mean', 'avg_mean']
medians = pd.DataFrame({ 'Bootstrap':CPOL_pdf[var].median(),
                        'CPOL':CPOL_pdf2[var].median(),
                        'UM 1.33km':UM133_pdf[var].median(),
                        'UM 0.44km':UM044_pdf[var].median()})
#medians.loc['avg_area'] /= 2.5**2
histdata =  [UM044_pdf[var].dropna(), UM133_pdf[var].dropna(), CPOL_pdf2[var].dropna(), CPOL_pdf[var]][::-1]
titles = ['UM 0.44km ens', 'UM 1.33km ens', 'CPOL', 'Bootstrap'][::-1]
fig = plt.figure(figsize=(10,8))
colm = colmap2.Blues
colm.set_under('w', alpha=0)
fig, (ax1, ax2, ax3, ax4) = plt.subplots(1, 4, sharey=True)
fig.subplots_adjust(bottom=0.07, right=0.98, left=0.01, top=0.35, wspace=0.01)
cbar_ax = fig.add_axes([0.15, 0.02, 0.7, 0.01])
hexbin_data = []
gridsize = 10
vmin=0.0001
vmax=0.1
nticks=10
YMax, XMax = 260, 70*2.5**2
#YMax, XMax = 50, 70*2.5**2
for i, ax in enumerate((ax1, ax2, ax3, ax3)):
    data = histdata[i][var[:2]]
    #data[var[0]] /= 2.5**2
    #data = data.loc[(data[var[0]] <= XMax) & (data[var[1]] <=YMax)]
    X = data[var[0]].values
    Y = data[var[1]].values
    ax.set_ylim(0,YMax)
    ax.set_xlim(0,XMax)
    hb =  ax.hexbin(X, Y,  gridsize=gridsize, extent=(0,XMax,0,YMax))
    hexbin_data.append(np.ones_like(Y, dtype=np.float) / hb.get_array().sum())
    
plt.cla()
medians = OrderedDict()

for i, ax in enumerate((ax1, ax2, ax3, ax4)):
    data = histdata[i][var[:2]]
    #data[var[0]] /= 2.5**2
    #data = data.loc[(data[var[0]] <= XMax) & (data[var[1]] <=YMax)]
    X = data[var[0]].values
    Y = data[var[1]].values
    ax.set_ylim(0,YMax)
    ax.set_xlim(0,XMax)
    im = ax.hexbin(X, Y,  gridsize=gridsize, cmap=colm, marginals=False, extent=(0,XMax,0,YMax),
                     vmin=vmin, vmax=vmax, C=hexbin_data[i], reduce_C_function=np.sum)
    
    ax.set_title(titles[i], fontsize=24)
    ax.grid(color='w', linestyle='', linewidth=0)
    ax.tick_params(labelsize=24)
    ax.xaxis.set_ticks(ax.xaxis.get_ticklocs()[:-1])
    x, y = histdata[i][var[0]].median(), histdata[i][var[1]].median()
    z, zz=  histdata[i][var[2]].median(), histdata[i][var[-1]].median()
    sx, sy = histdata[i][var[0]].std(), histdata[i][var[1]].std()
    medians[titles[i]] = '%2.1f km^2, %2i min (max: %2.1f mm/h, mean: %2.1f mm/h);'%(x,y, z, zz)
    ax.hlines(y*np.ones_like(histdata[i][var[0]]),0,x, 'firebrick', lw=3)
    ax.vlines(x*np.ones_like(histdata[i][var[1]]),0,y, 'firebrick', lw=3)
    ax.scatter([x], [y], marker='o', s=400, c='firebrick', alpha=0.8)
    if i == 0:
        ax.set_ylabel('Duration [min]', fontsize=24)
    ax.set_xlabel('Rainfall Area [km$^2$]', fontsize=24)
ary=im.get_array()/im.get_array().sum() * 100
im.set_array = ary
cbar = fig.colorbar(im, cax=cbar_ax, orientation='horizontal')
cbar.ax.tick_params(labelsize=24)
cbar.set_label('Density [ ]',size=24)
#cbar.set_ticks(np.linspace(vmin, vmax, nticks).round(2))
#cbar.set_ticklabels(np.linspace(vmin, vmax, nticks).round(2))
#print('Medians:')
#medians
#print(' '.join(['%s: %s'%(k, v) for (k,v) in medians.items()]))
plt.show()
<matplotlib.figure.Figure at 0x7f3195ce4c50>
In [696]:
colm = matplotlib_to_plotly(CubeYF_20.get_mpl_colormap(N=8, gamma=2.0),8, rgb=False)

3. Storm tracks above the 80th percentiles

  • marks start of track, colors for different ensemble member
In [588]:
#Plot the tracks
mpld3.enable_notebook()
fig = plt.figure(figsize=(10,7))
fig.subplots_adjust(right=0.94, bottom=0.45, top=0.95,left=0.01, hspace=0, wspace=0)
#cbar_ax = fig.add_axes([0.12, 0.17, 0.74, 0.02])
o = namedtuple('Sim', 'dataset percentiles')
tmp_obs = o({'obs': CPOL_t.dataset['obs']}, {'obs': CPOL_t.percentiles['obs']})

with nc(CPOLF) as fnc:
    lon=fnc.variables['longitude'][:]
    lat=fnc.variables['latitude'][:]
tp = 'mean'
num=80
titels = ['CPOL', 'UM 1.33km', 'UM 0.44km']
m = None
for i, data in enumerate(((tmp_obs, storm_obs), (UM133_t, storm_UM133), (UM044_t, storm_UM044))):
    tracks, stormId = data
    ax = fig.add_subplot(2,3,i+1)
    ax2 = fig.add_subplot(2,3,i+4)
    ax.set_title(titels[i], fontsize=18)
    for ii, tr in enumerate(tracks.dataset.keys()):
        if ii == 0:
            draw_map = None
            m2 = None
        else:
            draw_map = m
        Id = [stormId.Sim[tr]]
        perc = tracks.percentiles[tr][tp][num]
        ax, m, im = plot_traj(tracks.dataset[tr], lon, lat, ax=ax, mintrace=2, thresh_val=perc, thresh='mean', 
                  color=colm[ii], draw_map=draw_map, basemap_res='f', lw=0.5, size=20, particles=None)
        ax2, m2, im = plot_traj(tracks.dataset[tr], lon, lat, ax=ax2, mintrace=2, thresh_val=-1, thresh='mean', 
                  color=colm[ii], draw_map=m2, basemap_res='f', lw=0.5, size=20, particles=Id)
        #break
plt.show()

4. Classification of strom area, rain-rate and duration by rain-rate quintiles

In [721]:
mpld3.disable_notebook()
variables=dict(dur=('Duration [min]',300), avg_area=('Area [km$^2$]', 100*2.5**2), avg_mean=('Rain-rate [mm/h]',12))
fig = plt.figure()
fig.subplots_adjust(right=0.94, bottom=0.025, top=0.6,left=0.01, hspace=0, wspace=0)
i = 0
for y, prop in variables.items():
    label, ylim = prop
    npl = len(list(variables.keys()))
    i += 1
    ax = fig.add_subplot(npl,1,i)
    ax = sns.boxplot(x="quant", y=y, hue="run", data=df_stack, palette="muted", ax=ax)
    #ax = sns.stripplot(x="quant", y=y, hue="run", data=df_stack, jitter=True, palette="Set2", dodge=True)
    ax.legend(loc=0, fontsize=24)
    ax.tick_params(labelsize=24)
    ax.set_xlim(0.5,5.5)
    ax.set_ylim(0,ylim)
    ax.yaxis.set_ticks(ax.yaxis.get_ticklocs()[1:])
    ax.set_xlabel('Quintiles', fontsize=26)
    ax.set_ylabel(label, fontsize=26)

5. Stormtrack Percentiles vs Rain-rate

In [722]:
P = list(np.arange(0,99))+[99, 99.9, 99.99, 99.999, 100]

PERC = pd.DataFrame({'Obs':np.percentile(CPOL_pdf2['avg_mean'].values, P), 
                     'UM 1.33km': np.percentile(UM133_pdf['avg_mean'].values, P),
                      'UM 0.44km': np.percentile(UM044_pdf['avg_mean'].values, P)},index=P)

from mpl_toolkits.axes_grid.inset_locator import inset_axes
#Plot the percentages
fig=plt.figure()
ax = fig.add_subplot(111)
plt.subplots_adjust(bottom=0.05, right=0.9, top=0.4)
#ax.plot(PERC.index, PERC['Obs'].values, color='lightseagreen', linestyle='-', label='CPOL',lw=3)
ax.plot(PERC.index, PERC['UM 1.33km'].values, color='fuchsia', linestyle='--', label='UM 1.33km',lw=3)
ax.plot(PERC.index, PERC['UM 0.44km'].values, color='fuchsia', linestyle='-', label='UM 0.44km', lw=3)
ax.fill_between(PERC.index, member.min(axis=0), member.max(axis=0), color='cornflowerblue', alpha=0.2)
ax.plot(PERC.index, member.mean(axis=0), color='lightseagreen', linestyle='-', label='Cpol',lw=3)
ax.set_xlim(1,100)
#ax.set_ylim(10,100)
#ax.set_yscale('log')
#ax.set_xscale('log')
ax.set_xlabel('Percentile [\%]', fontsize=26)
ax.set_ylabel('Rain-rate [mm/h]', fontsize=26)
ax.legend(loc=0, fontsize=24)
ax.tick_params(labelsize=24)
ax.grid(color='r', linestyle='-', linewidth=0)
In [595]:
#Bootstrapping stuff:

7. Individual Cases (12/11 2006)

In [597]:
#Read the Simulation data
with nc(CPOLF) as fnc:
    lon=fnc.variables['longitude'][:]
    lat=fnc.variables['latitude'][:]
## First define the file names of the simulations
UMdir = os.path.join(os.getenv('HOME'), 'Data', 'Extremes', 'UM', 'darwin', 'RA1T')
ensembles = ('20061109T1200Z', '20061109T1800Z', '20061110T0000Z', '20061110T0600Z', '20061110T1200Z',
             '20061110T1800Z',  '20061111T0000Z', '20061111T1200Z')
Simend = '20061119_0600-%s'%remap_res
### Define the resolutions we have with help of a named tuple
gridded = namedtuple('Simulation', 'member dataset resolution')
UM133_grid = gridded(member=ensembles, dataset=[], resolution=1.33)
UM044_grid = gridded(member=ensembles, dataset=[], resolution=0.44)
OBS_grid = gridded(member=['CPOL'], dataset=[], resolution=2.55)
# Now construct the filenames of the UM - rainfall ouput and get the overlapping time periods
start, end = [], []
for ens in ensembles:
    date = datetime.strptime(ens,'%Y%m%dT%H%MZ')
    umf133 = 'um-1p33km-%s-rain_%s-%s.nc' %(date.strftime('%m%d%H%M'), date.strftime('%Y%m%d_%H%M'), Simend)
    umf044 = 'um-0p44km-%s-rain_%s-%s.nc' %(date.strftime('%m%d%H%M'), date.strftime('%Y%m%d_%H%M'), Simend)
    time133 = xr.open_dataset(os.path.join(UMdir,ens,'darwin','1p33km', umf133)).coords['t'].values
    time044 = xr.open_dataset(os.path.join(UMdir,ens,'darwin','0p44km', umf044)).coords['t'].values
    start.append((time133[0], time044[0]))
    end.append((time133[-1], time044[-1]))
end = np.min(np.array(end), axis=0)
start = np.max(np.array(start), axis=0)
# Now get the datasets for these overlapping time periods
for nn, ens in enumerate(ensembles):
    date = datetime.strptime(ens,'%Y%m%dT%H%MZ')
    umf133 = 'um-1p33km-%s-rain_%s-%s.nc' %(date.strftime('%m%d%H%M'), date.strftime('%Y%m%d_%H%M'), Simend)
    umf044 = 'um-0p44km-%s-rain_%s-%s.nc' %(date.strftime('%m%d%H%M'), date.strftime('%Y%m%d_%H%M'), Simend)
    for file, res, ntuple, n in ((umf133, '1p33km', UM133_grid, 0), (umf044, '0p44km', UM044_grid, 1)):
        fn = os.path.join(UMdir, ens,'darwin', res, file)
        with xr.open_dataset(fn) as ds:
            um_times = pd.DatetimeIndex(ds.coords['t'].values).round('1min')
            iloc_s = abs(um_times - pd.Timestamp(start[n])).argmin()
            iloc_e = abs(um_times - pd.Timestamp(end[n])).argmin()
            
        ntuple.dataset.append(xr.open_dataset(fn).isel(t=list(range(iloc_s,iloc_e+1)), surface=[0]))
        ntuple.dataset[-1].coords['t'] = um_times[iloc_s:iloc_e+1].round('5min')
# Now get the overlapping time periods for the observations


for nn,file in enumerate((WOHF, WOHFv1, CMORPHF, WOHFv2)):
    with nc(file) as ds:
        obs_time = pd.DatetimeIndex(num2date(ds.variables['t'][:], ds.variables['t'].units))
        iloc_s = abs(obs_time - pd.Timestamp(start[0])).argmin()
        iloc_e = abs(obs_time - pd.Timestamp(end[0])).argmin()

    ds = xr.open_dataset(file)
    ds.coords['t'] = obs_time
    if nn > 0:
        try:
            slon = np.fabs(ds.variables['longitude'].values[0,:] - min(lon)).argmin()
            elon = np.fabs(ds.variables['longitude'].values[0,:] - max(lon)).argmin()+1
            slat = np.fabs(ds.variables['latitude'].values[:,0] - min(lat)).argmin()
            elat = np.fabs(ds.variables['latitude'].values[:,0]- max(lat)).argmin()+1
        except IndexError:
            slon = np.fabs(ds.variables['longitude'].values[:] - min(lon)).argmin()
            elon = np.fabs(ds.variables['longitude'].values[:] - max(lon)).argmin()+1
            slat = np.fabs(ds.variables['latitude'].values[:] - min(lat)).argmin()
            elat = np.fabs(ds.variables['latitude'].values[:]- max(lat)).argmin()+1
        try:
            OBS_grid.dataset.append(ds.isel(t=list(range(iloc_s, iloc_e+1)), y=range(slon,elon), x=range(slat,elat)))
        except ValueError:
            OBS_grid.dataset.append(ds.isel(t=list(range(iloc_s, iloc_e+1)), longitude=range(slon,elon), latitude=range(slat,elat)))
    else:
        OBS_grid.dataset.append(ds.isel(t=list(range(iloc_s, iloc_e+1))))
UM133ens_grid = xr.concat(UM133_grid.dataset, dim='ens')
UM044ens_grid = xr.concat(UM044_grid.dataset, dim='ens')
In [598]:
#Now make a hovemoller plot of the date
mpld3.disable_notebook()
cmap = colmap2.Blues
cmap.set_under('w')
cmap.set_bad('w')
import matplotlib.gridspec as gridspec
from itertools import product

## Define start and end time
start_time = pd.DatetimeIndex([datetime(2006,11,12,12,0, tzinfo=timezone)])[0]
end_time = pd.DatetimeIndex([datetime(2006,11,12,18,0, tzinfo=timezone)])[0]
hfmt = dates.DateFormatter('%H')
## Get the information for cpol obs
dy1 = len(OBS_grid.dataset[1].coords['x'])//2
obs = OBS_grid.dataset[1].variables['lsrain'][...,:-20,:].mean(dim=('x'))
obs_tsteps = pd.DatetimeIndex(OBS_grid.dataset[1].coords['t'].values).tz_localize(utc).tz_convert(timezone)
obs_lon = OBS_grid.dataset[1].variables['longitude'][0,:]
sIdx = np.fabs((obs_tsteps - start_time).total_seconds().values).argmin()
eIdx = np.fabs((obs_tsteps - end_time).total_seconds().values).argmin()+1
obs_tsteps = obs_tsteps[sIdx:eIdx]
obs_data = np.ma.masked_less_equal(obs[sIdx:eIdx].values, 0.000)
#sIdx, eIdx = 0, -1

## Get the information from the simulations
### 044 SIM
dy1 = len(UM044ens_grid.coords['lat'])//2
um_tsteps1 = pd.DatetimeIndex(UM044ens_grid.coords['t'].values).tz_localize(utc).tz_convert(timezone) 
um1 = UM044ens_grid.variables['lsrain'][...,:-20,:].mean(dim=('lat','surface'))
sIdx = np.fabs((um_tsteps1 - start_time).total_seconds().values).argmin()
eIdx = np.fabs((um_tsteps1 - end_time).total_seconds().values).argmin()+1
um_data1 = np.ma.masked_less_equal(um1[:,sIdx:eIdx].values, 0.00)
um_lon1 = UM044ens_grid.coords['lon'][:]
um_tsteps1 = um_tsteps1[sIdx:eIdx]
tit = ['Obs']
## 133 SIM
dy1 = len(UM133ens_grid.coords['lat'])//2
um_tsteps2 = pd.DatetimeIndex(UM133ens_grid.coords['t'].values).tz_localize(utc).tz_convert(timezone) 
um2 = UM133ens_grid.variables['lsrain'][...,:-20,:].mean(dim=('lat','surface'))
sIdx = np.fabs((um_tsteps2 - start_time).total_seconds().values).argmin()
eIdx = np.fabs((um_tsteps2 - end_time).total_seconds().values).argmin()+1
um_data2 = np.ma.masked_less_equal(um2[:,sIdx:eIdx].values, 0.00)
um_lon2 = UM133ens_grid.coords['lon'][:]
um_tsteps2 = um_tsteps2[sIdx:eIdx]
## Stack the simulations together
um_data = [(um_data2, um_lon2, um_tsteps2) , (um_data1, um_lon1, um_tsteps1)]

#fig, ax = plt.subplots(3,3, sharey=True, sharex=True)
fig = plt.figure()
#ax=ax.ravel()


outer_grid = gridspec.GridSpec(3, 3, wspace=0.0, hspace=0.12)
inner_grid = gridspec.GridSpecFromSubplotSpec(1, 2, subplot_spec=outer_grid[0], wspace=0.0, hspace=0.0)
ax = plt.Subplot(fig, inner_grid[0])
im = ax.pcolormesh(obs_lon, obs_tsteps.tz_localize(None), obs_data,vmin=0.0,vmax=5,cmap=cmap)

fig.add_subplot(ax)

ax.set_title('Obs', fontsize=24)
ax.tick_params(labelsize=24)
ax.xaxis.set_ticks(list(ax.xaxis.get_ticklocs()[:-1][::3]))
#ax.set_ylabel('Local Time', fontsize=28)
ax.yaxis.set_major_formatter(hfmt)
add = ('UM 1.33km', 'UM 0.44km')
for i in range(1,9):
    inner_grid = gridspec.GridSpecFromSubplotSpec(1, 2, subplot_spec=outer_grid[i], wspace=0.0, hspace=0.0)
    for j, data in enumerate(um_data):

        um, lon, tsteps = data
        ax = plt.Subplot(fig, inner_grid[j])
        m = ax.pcolormesh(lon, tsteps.tz_localize(None), um[i-1],vmin=0.0,vmax=5,cmap=cmap)
        if i in (8,7,6):
            ax.tick_params(labelsize=24)
            ticks =[' ']+list(ax.xaxis.get_ticklocs()[1:][::3].round(1).astype(str))
            #ax.xaxis.set_ticks(ticks)
            ax.xaxis.set_ticks(list(ax.xaxis.get_ticklocs()[:-1][::3]))
        else:
            ax.set_xticks([])
        tit = add[j]+ r' (\rom{{{}}})'.format(i)
        #tit = add[j]+datetime.strptime(ensembles[i-1], '%Y%m%dT%H%MZ').strftime(' (%e %b %H UTC)')
        #if j == 0:
            
        if i == 3 or i == 6:
            if j == 0:
                #ax.set_ylabel('Local Time', fontsize=28)
                ax.tick_params(labelsize=24)
                ax.yaxis.set_major_formatter(hfmt)
            else:
                ax.set_yticks([])
        else:
            ax.set_yticks([])
        ax.set_title(tit, fontsize=24)


        fig.add_subplot(ax)
fig.subplots_adjust(right=0.98, bottom=0.25, top=0.99,left=0.01, hspace=0.1, wspace=0)
cbar_ax = fig.add_axes([0.14, 0.20, 0.74, 0.01])
cbar=fig.colorbar(im, cax=cbar_ax, orientation='horizontal')
cbar.ax.tick_params(labelsize=24)
cbar.set_label('Avg. Rain-rate [mm/h]',size=24)
fig.text(0.5, 0.215, 'Longitude', ha='center', fontsize=30)
fig.text(-0.03, 1-0.38, 'Local Time (12. Nov. 2006)', va='center', rotation='vertical', fontsize=30)
#for i in (6, 7, 8):
#    ax[i].set_xlabel('Longitude', fontsize=28)
Out[598]:
<matplotlib.text.Text at 0x7f3195d9ea90>
In [601]:
um044_n = len(UM044_trackData.coords['dim_0'])
um133_n = len(UM133_trackData.coords['dim_0'])
cpol_n = len(CPOL_trackData.coords['dim_0'])

cpol_d = round(CPOL_trackData.to_dataframe()['dist'].max(),2)*2
um044_d = np.array(UM044_trackData['dist'].max(axis=1).median()).round(2)*1.5
um133_d = np.array(UM133_trackData['dist'].max(axis=1).median()).round(2)*1.5


var=['avg_area','dur', 'dist', 'avg_mean', 'max_mean']
names=['area', 'duration', 'distance', 'avg-rain', 'max-rain', 'distance','# storms']
#print(CPOL_pdf[var].median())
medians = pd.DataFrame({'b': list(CPOL_trackData[var].to_dataframe().median().values.round(2))+[cpol_d,int(cpol_n)],
                        'd': list(UM044_trackData[var].to_dataframe().median().values.round(2))+[um044_d,int(um044_n)],
                        'c': list(UM133_trackData[var].to_dataframe().median().values.round(2))+[um133_d,int(um133_n)]} )
                       #index=('Area','Duration', 'Mean-Rain', 'Max-Rain'))
medians.index=names
medians.columns=['Cpol', 'UM 1.33km', 'UM 0.44km']
#print('Medians:')               
medians.round(2)
Out[601]:
Cpol UM 1.33km UM 0.44km
area 99.31 83.37 58.33
duration 121.60 70.00 50.00
distance 41.81 29.09 22.83
avg-rain 4.69 5.47 6.21
max-rain 7.09 7.86 10.21
distance 189.40 110.23 87.02
# storms 3.00 5.00 7.00